global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/df_longdiff.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"

capture noi {
*************************************************************
*v3 Build dataset for long-difference exercise of table A 37*
*************************************************************

*what year difference are we looking at
global window 5
*how many years to shift the window initially?
global shift 0
* default is window: 5 and shift: 0.

*load the regression dataset-> needs to be run after this
use ${final_dir}/regression_dataset${weight_window}_${wtype}.dta, clear

*load the firms used in the baseline regression of table 5
mmerge BvD using ${final_dir}/bvd_list_regfirms_auto95.dta, unmatched(master)
keep if _m == 3

* Calculate long-differences of RHS variables (yearly counterparts from t-window-1 to t)
gen ispMPm_1995_a = lswMPm_1995_a - hswMPm_1995_a
gen ispMPm_shr4_foreign_1995_a = lswMPm_shr4_foreign_1995_a - hswMPm_shr4_foreign_1995_a
qui ds
sort lse_id year

*shifts
local initwin = 1 - ${shift} // defau1t: 1
local pwin = ${window} - `initwin' // default: 4
local lwin = ${window} + `initwin' // default: 6

*shift variables
foreach var of varlist *_a {
	gen D${window}`var' = `var' - L`pwin'.`var' 
	drop `var'
}

foreach x in auto90 auto95 { 
	global depvar "`x'"
	* Calculate arcsinh of {window}-overlapping differences (in sum of (default) auto95_bia's))
	bys lse_id (year): gen ts${depvar}_${ttt} = sum(${depvar}_${ttt})
	gen F${window}_${depvar}_${ttt} = F`pwin'.ts${depvar}_${ttt} - L`initwin'.ts${depvar}_${ttt} // default: sum from t+4 to t
	gen L${window}_${depvar}_${ttt} = L`initwin'.ts${depvar}_${ttt} - L`lwin'.ts${depvar}_${ttt}  // default sum from t-5 to t-1

	* Make arcsinh and diff transformation of depvar
	gen arcsinh_F${window}_${depvar}_${ttt}=log(F${window}_${depvar}_${ttt}+((F${window}_${depvar}_${ttt})^2+1)^(1/2))
	gen arcsinh_L${window}_${depvar}_${ttt}=log(L${window}_${depvar}_${ttt}+((L${window}_${depvar}_${ttt})^2+1)^(1/2))
	gen diff_F${window}L${window}_${depvar}_${ttt} = arcsinh_F${window}_${depvar}_${ttt}-arcsinh_L${window}_${depvar}_${ttt}
}

* Merge the transformed var's with the original dataset, keep what we need, save
keep year BvD diff_F${window}* D${window}*
merge 1:1 BvD year using ${final_dir}/regression_dataset${weight_window}_${wtype}.dta
keep if _m==3
keep year BvD lse_id auto95_bia auto90_bia diff* D* industry country_shr_1995 missing_weights_1995 missing_spill_weights_1995 maxweight_1995
save ${final_dir}/regression_dataset${weight_window}_${wtype}_longdiff.dta, replace

}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat